mruby 4.0.0
mruby is the lightweight implementation of the Ruby language
Loading...
Searching...
No Matches
khash.h
Go to the documentation of this file.
1
6
7#ifndef MRUBY_KHASH_H
8#define MRUBY_KHASH_H
9
10#include <string.h>
11
12#include <mruby.h>
13#include "common.h"
14
19
20typedef uint32_t khint_t;
21typedef khint_t khiter_t;
22
23#ifndef KHASH_INITIAL_SIZE
24# define KHASH_INITIAL_SIZE 32
25#endif
26#define KHASH_MIN_SIZE 8
27#define KHASH_SMALL_LIMIT 4
28
29#define KH_UPPER_BOUND(x) ((x) - ((x)>>3)) /* 87.5% load factor */
30
31/* extern uint8_t __m[]; */
32
33/* mask for flags */
34static const uint8_t __m_empty[] = {0x02, 0x08, 0x20, 0x80};
35static const uint8_t __m_del[] = {0x01, 0x04, 0x10, 0x40};
36static const uint8_t __m_either[] = {0x03, 0x0c, 0x30, 0xc0};
37
38
39#define __ac_isempty(ed_flag, i) (ed_flag[(i)/4]&__m_empty[(i)%4])
40#define __ac_isdel(ed_flag, i) (ed_flag[(i)/4]&__m_del[(i)%4])
41#define __ac_iseither(ed_flag, i) (ed_flag[(i)/4]&__m_either[(i)%4])
42#define khash_power2(v) do { \
43 v--;\
44 v |= v >> 1;\
45 v |= v >> 2;\
46 v |= v >> 4;\
47 v |= v >> 8;\
48 v |= v >> 16;\
49 v++;\
50} while (0)
51#define khash_mask(h) ((h)->n_buckets-1)
52#define khash_upper_bound(h) (KH_UPPER_BOUND((h)->n_buckets))
53
54/* BREAKING CHANGE: khash structure optimized for 50% memory reduction
55 *
56 * The structure now uses a single data pointer instead of separate keys,
57 * vals, and ed_flags pointers, reducing size from 32 to 16 bytes.
58 *
59 * MIGRATION REQUIRED for field access macros:
60 * - OLD: kh_key(h, x) NEW: kh_key(typename, h, x)
61 * - OLD: kh_val(h, x) NEW: kh_val(typename, h, x)
62 * - OLD: kh_exist(h, x) NEW: kh_exist(typename, h, x)
63 * - OLD: KHASH_FOREACH() NEW: KHASH_FOREACH(typename, ...)
64 *
65 * Function-style macros (kh_get, kh_put, etc.) remain unchanged.
66 */
67
68/* declare struct kh_xxx and kh_xxx_funcs
69
70 name: hash name
71 khkey_t: key data type
72 khval_t: value data type
73 kh_is_map: (0: hash set / 1: hash map)
74*/
75#define KHASH_DECLARE(name, khkey_t, khval_t, kh_is_map) \
76 typedef struct kh_##name { \
77 void *data; /* Single allocation: [keys][vals][flags] */ \
78 khint_t n_buckets; /* Number of buckets (power of 2) */ \
79 khint_t size; /* Number of elements */ \
80 } kh_##name##_t; \
81 /* Address calculation functions for optimized memory layout */ \
82 static inline khkey_t* kh_keys_##name(const kh_##name##_t *h) { \
83 return (khkey_t*)(h)->data; \
84 } \
85 static inline khval_t* kh_vals_##name(const kh_##name##_t *h) { \
86 return kh_is_map ? \
87 (khval_t*)((uint8_t*)(h)->data + sizeof(khkey_t) * (h)->n_buckets) : NULL; \
88 } \
89 static inline uint8_t* kh_flags_##name(const kh_##name##_t *h) { \
90 return (uint8_t*)(h)->data + sizeof(khkey_t) * (h)->n_buckets + \
91 (kh_is_map ? sizeof(khval_t) * (h)->n_buckets : 0); \
92 } \
93 kh_##name##_t *kh_init_##name##_size(mrb_state *mrb, khint_t size); \
94 kh_##name##_t *kh_init_##name(mrb_state *mrb); \
95 void kh_destroy_##name(mrb_state *mrb, kh_##name##_t *h); \
96 void kh_clear_##name(mrb_state *mrb, kh_##name##_t *h); \
97 khint_t kh_get_##name(mrb_state *mrb, kh_##name##_t *h, khkey_t key); \
98 khint_t kh_put_##name(mrb_state *mrb, kh_##name##_t *h, khkey_t key, int *ret); \
99 void kh_resize_##name(mrb_state *mrb, kh_##name##_t *h, khint_t new_n_buckets); \
100 void kh_del_##name(mrb_state *mrb, kh_##name##_t *h, khint_t x); \
101 kh_##name##_t *kh_copy_##name(mrb_state *mrb, kh_##name##_t *h); \
102 void kh_init_data_##name(mrb_state *mrb, kh_##name##_t *h, khint_t size); \
103 void kh_destroy_data_##name(mrb_state *mrb, kh_##name##_t *h); \
104 void kh_replace_##name(mrb_state *mrb, kh_##name##_t *dst, const kh_##name##_t *src);
105
106/* define kh_xxx_funcs
107
108 name: hash name
109 khkey_t: key data type
110 khval_t: value data type
111 kh_is_map: (0: hash set / 1: hash map)
112 __hash_func: hash function
113 __hash_equal: hash comparison function
114*/
115#define KHASH_DEFINE(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
116 mrb_noreturn void mrb_raise_nomemory(mrb_state *mrb); \
117 /* Internal helper functions */ \
118 static inline size_t kh__kv_size_##name(khint_t count) { \
119 return sizeof(khkey_t) * count + \
120 (kh_is_map ? sizeof(khval_t) * count : 0); \
121 } \
122 static inline size_t kh__htable_size_##name(khint_t n_buckets) { \
123 return kh__kv_size_##name(n_buckets) + n_buckets / 4; \
124 } \
125 static inline void kh__mark_occupied_##name(kh_##name##_t *h, khint_t i) { \
126 uint8_t *flags = kh_flags_##name(h); \
127 flags[i/4] &= ~__m_either[i%4]; /* Clear both empty and deleted bits */ \
128 } \
129 static inline void kh__mark_deleted_##name(kh_##name##_t *h, khint_t i) { \
130 uint8_t *flags = kh_flags_##name(h); \
131 flags[i/4] |= __m_del[i%4]; /* Set deleted bit */ \
132 } \
133 static inline khint_t kh__key_idx_##name(mrb_state *mrb, khkey_t key, kh_##name##_t *h) { \
134 return __hash_func(mrb, key) & khash_mask(h); \
135 } \
136 static inline khint_t kh__next_probe_##name(khint_t k, khint_t *step, kh_##name##_t *h) { \
137 return (k+(++(*step))) & khash_mask(h); \
138 } \
139 static inline khint_t kh__insert_key_##name(kh_##name##_t *h, khint_t index, khkey_t key) { \
140 khkey_t *keys = kh_keys_##name(h); \
141 keys[index] = key; \
142 kh__mark_occupied_##name(h, index); \
143 h->size++; \
144 return index; \
145 } \
146 static inline void kh__clear_flags_##name(kh_##name##_t *h, khint_t n_buckets) { \
147 memset(kh_flags_##name(h), 0xaa, n_buckets/4); \
148 } \
149 static inline void kh__alloc_##name(mrb_state *mrb, kh_##name##_t *h) { \
150 khint_t sz = h->n_buckets; \
151 uint8_t *p = (uint8_t*)mrb_malloc(mrb, kh__htable_size_##name(sz)); \
152 h->size = 0; \
153 h->data = p; /* Single data pointer for optimized layout */ \
154 kh__clear_flags_##name(h, sz); \
155 } \
156 /* Small table optimization functions */ \
157 static inline int kh__is_small_##name(const kh_##name##_t *h) { \
158 return h->n_buckets == 0; /* Small table marker */ \
159 } \
160 static inline khint_t kh__get_small_##name(mrb_state *mrb, kh_##name##_t *h, khkey_t key) { \
161 khkey_t *keys = kh_keys_##name(h); \
162 for (khint_t i = 0; i < h->size; i++) { \
163 if (__hash_equal(mrb, keys[i], key)) return i; \
164 } \
165 return h->size; /* Not found - return end position */ \
166 } \
167 static inline void kh__rebuild_##name(mrb_state *mrb, kh_##name##_t *h, khint_t new_n_buckets) { \
168 kh_##name##_t hh; \
169 hh.data = NULL; \
170 hh.size = 0; \
171 kh_init_data_##name(mrb, &hh, new_n_buckets); \
172 /* Rehash from old 'h' to 'hh' */ \
173 khkey_t *old_keys = kh_keys_##name(h); \
174 khval_t *old_vals = kh_vals_##name(h); \
175 uint8_t *old_flags = kh__is_small_##name(h) ? NULL : kh_flags_##name(h); \
176 khint_t limit = old_flags ? h->n_buckets : h->size; \
177 for (khint_t i = 0; i < limit; i++) { \
178 if (old_flags && __ac_iseither(old_flags, i)) continue; \
179 khint_t k = kh_put_##name(mrb, &hh, old_keys[i], NULL); \
180 if (kh_is_map) { \
181 kh_val(name, &hh, k) = old_vals[i]; \
182 } \
183 } \
184 /* Final Swap */ \
185 mrb_free(mrb, h->data); \
186 h->data = hh.data; \
187 h->n_buckets = hh.n_buckets; \
188 h->size = hh.size; \
189 } \
190 static inline khint_t kh__put_small_##name(mrb_state *mrb, kh_##name##_t *h, khkey_t key, int *ret) { \
191 /* First check if key exists */ \
192 khint_t pos = kh__get_small_##name(mrb, h, key); \
193 if (pos < h->size) { \
194 if (ret) *ret = 0; /* Key exists */ \
195 return pos; \
196 } \
197 /* Check if we need to convert to hash table */ \
198 if (h->size >= KHASH_SMALL_LIMIT) { \
199 /* Convert from small table to hash table */ \
200 kh__rebuild_##name(mrb, h, KHASH_MIN_SIZE); \
201 /* Now add the new key using regular hash table */ \
202 return kh_put_##name(mrb, h, key, ret); \
203 } \
204 /* Add new element to small table */ \
205 khkey_t *keys = kh_keys_##name(h); \
206 keys[h->size] = key; \
207 h->size++; \
208 if (ret) *ret = 1; /* New key */ \
209 return h->size - 1; \
210 } \
211 kh_##name##_t *kh_init_##name##_size(mrb_state *mrb, khint_t size) { \
212 kh_##name##_t *h = (kh_##name##_t*)mrb_calloc(mrb, 1, sizeof(kh_##name##_t)); \
213 kh_init_data_##name(mrb, h, size); \
214 return h; \
215 } \
216 kh_##name##_t *kh_init_##name(mrb_state *mrb) { \
217 return kh_init_##name##_size(mrb, KHASH_INITIAL_SIZE); \
218 } \
219 void kh_destroy_##name(mrb_state *mrb, kh_##name##_t *h) \
220 { \
221 kh_destroy_data_##name(mrb, h); \
222 mrb_free(mrb, h); \
223 } \
224 void kh_clear_##name(mrb_state *mrb, kh_##name##_t *h) \
225 { \
226 (void)mrb; \
227 if (h && h->data) { \
228 kh__clear_flags_##name(h, h->n_buckets); \
229 h->size = 0; \
230 } \
231 } \
232 khint_t kh_get_##name(mrb_state *mrb, kh_##name##_t *h, khkey_t key) \
233 { \
234 if (kh__is_small_##name(h)) { \
235 return kh__get_small_##name(mrb, h, key); \
236 } \
237 /* Cache calculated pointers for performance */ \
238 khkey_t *keys = kh_keys_##name(h); \
239 uint8_t *ed_flags = kh_flags_##name(h); \
240 khint_t k = kh__key_idx_##name(mrb, key, h), step = 0; \
241 (void)mrb; \
242 while (!__ac_isempty(ed_flags, k)) { \
243 if (!__ac_isdel(ed_flags, k)) { \
244 if (__hash_equal(mrb, keys[k], key)) return k; \
245 } \
246 k = kh__next_probe_##name(k, &step, h); \
247 } \
248 return kh_end(h); \
249 } \
250 void kh_resize_##name(mrb_state *mrb, kh_##name##_t *h, khint_t new_n_buckets) \
251 { \
252 if (new_n_buckets < KHASH_MIN_SIZE) \
253 new_n_buckets = KHASH_MIN_SIZE; \
254 khash_power2(new_n_buckets); \
255 kh__rebuild_##name(mrb, h, new_n_buckets); \
256 } \
257 khint_t kh_put_##name(mrb_state *mrb, kh_##name##_t *h, khkey_t key, int *ret) \
258 { \
259 if (kh__is_small_##name(h)) { \
260 return kh__put_small_##name(mrb, h, key, ret); \
261 } \
262 khint_t k, del_k, step = 0; \
263 if (h->size >= khash_upper_bound(h)) { \
264 kh_resize_##name(mrb, h, h->n_buckets*2); \
265 } \
266 /* Cache calculated pointers for performance */ \
267 khkey_t *keys = kh_keys_##name(h); \
268 uint8_t *ed_flags = kh_flags_##name(h); \
269 k = kh__key_idx_##name(mrb, key, h); \
270 del_k = kh_end(h); \
271 while (!__ac_isempty(ed_flags, k)) { \
272 if (!__ac_isdel(ed_flags, k)) { \
273 if (__hash_equal(mrb, keys[k], key)) { \
274 if (ret) *ret = 0; \
275 return k; \
276 } \
277 } \
278 else if (del_k == kh_end(h)) { \
279 del_k = k; \
280 } \
281 k = kh__next_probe_##name(k, &step, h); \
282 } \
283 if (del_k != kh_end(h)) { \
284 /* put at del */ \
285 kh__insert_key_##name(h, del_k, key); \
286 if (ret) *ret = 2; \
287 return del_k; \
288 } \
289 else { \
290 /* put at empty */ \
291 kh__insert_key_##name(h, k, key); \
292 if (ret) *ret = 1; \
293 return k; \
294 } \
295 } \
296 void kh_del_##name(mrb_state *mrb, kh_##name##_t *h, khint_t x) \
297 { \
298 (void)mrb; \
299 if (kh__is_small_##name(h)) { \
300 /* Small table deletion: shift elements down */ \
301 mrb_assert(x < h->size); \
302 khkey_t *keys = kh_keys_##name(h); \
303 khval_t *vals = kh_vals_##name(h); \
304 for (khint_t i = x; i < h->size - 1; i++) { \
305 keys[i] = keys[i + 1]; \
306 if (kh_is_map) vals[i] = vals[i + 1]; \
307 } \
308 h->size--; \
309 } \
310 else { \
311 /* Regular hash table deletion */ \
312 mrb_assert(x != h->n_buckets && !__ac_iseither(kh_flags_##name(h), x)); \
313 kh__mark_deleted_##name(h, x); \
314 h->size--; \
315 } \
316 } \
317 kh_##name##_t *kh_copy_##name(mrb_state *mrb, kh_##name##_t *h) \
318 { \
319 kh_##name##_t *h2 = (kh_##name##_t*)mrb_calloc(mrb, 1, sizeof(kh_##name##_t)); \
320 kh_replace_##name(mrb, h2, h); \
321 return h2; \
322 } \
323 void kh_init_data_##name(mrb_state *mrb, kh_##name##_t *h, khint_t size) { \
324 if (size <= KHASH_SMALL_LIMIT) { \
325 /* Start as small table */ \
326 h->n_buckets = 0; /* Small table marker */ \
327 h->data = mrb_malloc(mrb, kh__kv_size_##name(KHASH_SMALL_LIMIT)); \
328 h->size = 0; \
329 } \
330 else { \
331 /* Start as regular hash table */ \
332 if (size < KHASH_MIN_SIZE) \
333 size = KHASH_MIN_SIZE; \
334 khash_power2(size); \
335 h->n_buckets = size; \
336 kh__alloc_##name(mrb, h); \
337 } \
338 } \
339 void kh_destroy_data_##name(mrb_state *mrb, kh_##name##_t *h) \
340 { \
341 if (h && h->data) { \
342 mrb_free(mrb, h->data); /* Free only the data allocation */ \
343 h->data = NULL; \
344 } \
345 } \
346 void kh_replace_##name(mrb_state *mrb, kh_##name##_t *dst, const kh_##name##_t *src) \
347 { \
348 if (!src || (src->n_buckets == 0 && src->size == 0)) { \
349 /* Empty source */ \
350 kh_destroy_data_##name(mrb, dst); \
351 dst->data = NULL; \
352 dst->n_buckets = 0; \
353 dst->size = 0; \
354 } \
355 else if (src->n_buckets == 0) { \
356 /* Small table case */ \
357 size_t data_size = kh__kv_size_##name(KHASH_SMALL_LIMIT); \
358 dst->data = mrb_realloc(mrb, dst->data, data_size); \
359 dst->size = src->size; \
360 dst->n_buckets = 0; \
361 /* Copy only the used portion of keys and values */ \
362 size_t copy_size = kh__kv_size_##name(src->size); \
363 memcpy(dst->data, src->data, copy_size); \
364 } \
365 else { \
366 /* Regular hash table case */ \
367 size_t data_size = kh__htable_size_##name(src->n_buckets); \
368 dst->data = mrb_realloc(mrb, dst->data, data_size); \
369 dst->size = src->size; \
370 dst->n_buckets = src->n_buckets; \
371 /* Copy the entire data block: [keys][vals][flags] */ \
372 memcpy(dst->data, src->data, data_size); \
373 } \
374 }
375
376
377#define khash_t(name) kh_##name##_t
378
379#define kh_init_size(name,mrb,size) kh_init_##name##_size(mrb,size)
380#define kh_init(name,mrb) kh_init_##name(mrb)
381#define kh_destroy(name, mrb, h) kh_destroy_##name(mrb, h)
382#define kh_clear(name, mrb, h) kh_clear_##name(mrb, h)
383#define kh_resize(name, mrb, h, s) kh_resize_##name(mrb, h, s)
384#define kh_put(name, mrb, h, k) kh_put_##name(mrb, h, k, NULL)
385#define kh_put2(name, mrb, h, k, r) kh_put_##name(mrb, h, k, r)
386#define kh_get(name, mrb, h, k) kh_get_##name(mrb, h, k)
387#define kh_del(name, mrb, h, k) kh_del_##name(mrb, h, k)
388#define kh_copy(name, mrb, h) kh_copy_##name(mrb, h)
389#define kh_init_data(name, mrb, h, size) kh_init_data_##name(mrb, h, size)
390#define kh_destroy_data(name, mrb, h) kh_destroy_data_##name(mrb, h)
391#define kh_replace(name, mrb, dst, src) kh_replace_##name(mrb, dst, src)
392
393/* BREAKING CHANGE: Field access macros now require type name as first parameter
394 * The macros keep their familiar names but now need the hash type name.
395 *
396 * MIGRATION: Add type name as first parameter:
397 * kh_key(h, x) -> kh_key(typename, h, x)
398 * kh_val(h, x) -> kh_val(typename, h, x)
399 * kh_exist(h, x) -> kh_exist(typename, h, x)
400 * kh_value(h, x) -> kh_value(typename, h, x)
401 */
402
403/* Type-aware access macros - same names, now with type parameter */
404#define kh_exist(name, h, x) ((h)->n_buckets == 0 ? ((x) < (h)->size) : (!__ac_iseither(kh_flags_##name(h), (x))))
405#define kh_key(name, h, x) (kh_keys_##name(h)[x])
406#define kh_val(name, h, x) (kh_vals_##name(h)[x])
407#define kh_value(name, h, x) (kh_vals_##name(h)[x])
408#define kh_begin(h) (khint_t)(0)
409#define kh_end(h) ((h)->n_buckets == 0 ? (h)->size : (h)->n_buckets)
410#define kh_is_end(h, i) ((i) >= kh_end(h))
411#define kh_size(h) ((h)->size)
412#define kh_n_buckets(h) ((h)->n_buckets)
413
414#define kh_int_hash_func(mrb,key) mrb_int_hash_func(mrb,key)
415#define kh_int_hash_equal(mrb,a, b) (a == b)
416#define kh_int64_hash_func(mrb,key) (khint_t)((key)>>33^(key)^(key)<<11)
417#define kh_int64_hash_equal(mrb,a, b) (a == b)
418static inline khint_t __ac_X31_hash_string(const char *s)
419{
420 khint_t h = *s;
421 if (h) for (++s; *s; ++s) h = (h << 5) - h + *s;
422 return h;
423}
424#define kh_str_hash_func(mrb,key) __ac_X31_hash_string(key)
425#define kh_str_hash_equal(mrb,a, b) (strcmp(a, b) == 0)
426
427typedef const char *kh_cstr_t;
428
430
446/* BREAKING CHANGE: KHASH_FOREACH now requires type name as first parameter
447 * OLD: KHASH_FOREACH(mrb, kh, k)
448 * NEW: KHASH_FOREACH(name, kh, k)
449 */
450#define KHASH_FOREACH(name, kh, k) \
451 if (kh) \
452 for (khiter_t k = kh_begin(kh); !kh_is_end(kh, k); k++) \
453 if (kh_exist(name, kh, k))
454
455#endif /* MRUBY_KHASH_H */
mruby common platform definition"
#define MRB_END_DECL
End declarations in C mode.
Definition common.h:28
#define MRB_BEGIN_DECL
Start declarations in C mode.
Definition common.h:26
uint32_t khint_t
khash definitions used in mruby's hash table.
Definition khash.h:20
String class